rm(list=ls(all=TRUE))

### This code produces the figures in "Tracking evolving views of the 2024 Atlantic hurricane season with an expert prediction market" by
### Roulston, Toumi, and Kaivanto.


### functions

plot_fanchart = function( x, y, xlim=NULL, ylim=NULL, xlab='', ylab='', border=NA, cols=NULL, 
	mediancol='black', medianlwd=3, cex.axis=1, cex.lab=1, xaxt='n', yaxt='n', main='', cex.main=1 ) {
	# x is x-axis points (n)
	# y is an array of quantiles (n,m)
	if (is.null(xlim)) { xlim = range(x) }
	if (is.null(ylim)) { ylim = range(y) }
	if (is.null(cols)) { cols = c('white','black') }
	pal = colorRampPalette(cols)
	m = ncol(y)
	maxq = floor(m/2)
	colours = pal(maxq+1)
	plot(xlim, ylim, type='n' , xlab=xlab, ylab=ylab, xaxt=xaxt, yaxt=yaxt, cex.lab=cex.lab, cex.axis=cex.axis, main=main, cex.main=cex.main )
	m = ncol(y)
	for (qIndex in c(1:maxq)) {
		polygon( c(x,rev(x)), c(y[,qIndex],rev(y[,m+1-qIndex])), border=border, col=colours[qIndex+1] )
	}
	# if odd number of quantiles plot the central one as a line
	if (m%%2 ==1) { lines( x, y[,maxq+1], col=mediancol, lwd=medianlwd ) } 
	# you can add a date axis with axis.Date
	# axis.Date(1,Day,at=seq(min(x),max(x),by="months"),format="%b-%y")
	# a legend can be added to
	# legend('topleft',legend=c('median','50% interval','90% interval'),lwd=c(3,NA,NA),pch=c(NA,15,15),col=c('black',z$colours[3],z$colours[2]))
	return(list(colours=colours))
}


estimate_quantiles = function(p, x, pout) {	
	### From a distribution given by probabilities p at values x
	### this function estimates the quantiles for the cumulative probabilities
	### pout.
	cPdf = cumsum(p)
	uniq_cPdf = unique(cPdf)
	uniq_indx = match(uniq_cPdf,cPdf)
	q = approx(uniq_cPdf, x[uniq_indx], pout)
	return(list(pout=q$x,qout=q$y))
}


### Analyse CAHM24 date
 
### Read in CAHM24 price data and reformat it.
	filename = "CAHM24Prices.csv"
	cahmdata = read.csv(filename,sep=",")
	dates = as.Date(cahmdata$Day,format="%d/%m/%Y")
	### only use lines where "?" does not appear in prices and trades is a number and date is before Dec 1 2024
	valid = which(grepl("\\?",cahmdata$X0)==FALSE & is.finite(cahmdata$Trades) & dates < as.Date("2024-12-30") & dates > as.Date("2023-05-01")) 
	dates = dates[valid]
	nDates = length(dates)
	prices = array(0,c(nDates,21))
	for (iDate in c(1:nDates)) { prices[iDate,] = as.numeric(cahmdata[valid[iDate],4:24]) }

### Define daily probabilities of hurricane occurrence in the simple expectations model.
	d = as.numeric(difftime(dates,min(dates),units='days'))
	pDaily = dnorm(d,d[dates==as.Date("2024-09-10")],41)
	pDaily[dates < as.Date("2024-06-01") | dates > as.Date("2024-11-30")] = 0.0 ### zero probability outside of official season
	pDaily = 10*pDaily/sum(pDaily)
	
### Read in observed TS and hurricane data
	filename = "HurricaneSeason2024.csv"
	obsdata = read.csv(filename,sep=",")
	obsdata$TROPICAL.STORM.STATUS = as.Date(obsdata$TROPICAL.STORM.STATUS,format="%d/%m/%Y")
	obsdata$HURRICANE.STATUS = as.Date(obsdata$HURRICANE.STATUS,format="%d/%m/%Y")
	TRST_count = array(0,nDates)
	HURR_count = array(0,nDates)
	PROJ_count = array(0,nDates)
	PROJ_count_sd = array(0,nDates)
	for (iDate in c(1:nDates)) {
		TRST_count[iDate] = sum(obsdata$TROPICAL.STORM.STATUS <= dates[iDate])
		HURR_count[iDate] = sum(obsdata$HURRICANE.STATUS <= dates[iDate],na.rm=TRUE)	
		ptmp = pDaily[dates > dates[iDate]]
		PROJ_count[iDate] = HURR_count[iDate] + sum(ptmp)	
		PROJ_count_sd[iDate] = sqrt(sum(ptmp*(1-ptmp)))
	}	
	hurricanes = which(is.finite(obsdata$CATEGORY))

### Read in forecast data
	filename = "HurricaneSeason2024Forecasts.csv"
	fcstdata = read.csv(filename,sep=",")
	fcstdata$DATE = as.Date(fcstdata$DATE,format="%d/%m/%Y")

### Estimate quantiles of price data
	pout = c(0.2,0.5,0.8) 
	quantiles = array(0,c(nDates,length(pout)))
	x = seq(0,20,1) + 0.5
	for (iDate in c(1:nDates)) {
		tmp = estimate_quantiles(prices[iDate,], x, pout)
		quantiles[iDate,] = tmp$qout
	}

### Colour scheme
	col1 = 'grey'
	col2 = 'blue'
	col3 = 'red'
	col4 = 'darkgreen'
	col5 = 'darkred'

### Generate FIGURE 1 (histograms of prices on 4 dates)
	png(filename = "Figure1.png",
    		width = 2400, height = 2400, units = "px", pointsize = 48,
    		bg = "white")
	par(mfrow=c(2,2))
	snapShot = c(as.Date("2023-12-10"),as.Date("2024-06-01"),as.Date("2024-09-08"),as.Date("2024-09-30"))
	nSnapshots = length(snapShot)
	outcomes = c(seq(0,19,1),"20+")
	bar_colours = rep(col1,21)
	bar_colours[12] = col5 ### color the bar for '11 hurricanes' as it is the one that occurred.
	for (iSnapshot in c(1:nSnapshots)) {
		iDate = which(dates == snapShot[iSnapshot])
		barplot(prices[iDate,],col=bar_colours,names.arg=outcomes,cex.names=0.6,las=2,xlab='number of hurricanes',ylab='price',
			main=sprintf('(%s) %s',letters[iSnapshot],format(snapShot[iSnapshot],"%d %B %Y")))
		if (iSnapshot == 1) { 
			legend('topright',legend=c('actual outcome'),pch=15,col=col5,cex=1.5,bty='n')
		}
	}
	dev.off()

### Generate FIGURE 2 (daily probabilities of hurricane formation used in simple model)
	png(filename = "Figure2.png",
    		width = 3200, height = 2400, units = "px", pointsize = 48,
    		bg = "white")
	plot(dates,pDaily,type='l',xaxt='n',xlab='',lwd=4,col=col5,xlim=c(as.Date("2024-06-01"),as.Date("2024-12-30")),
		ylab='daily probability of hurricane formation',cex.lab=1.2,
		main='daily probability of hurricane formation in the simple expectations model',cex.main=1.5)
	axis.Date(1,Day,at=seq(min(dates),max(dates)+14,by="weeks"),format="%d-%b-%y",las=2,cex.axis=0.8)
	dev.off()

### Generate FIGURE 3 (timeseries of CRUCIAL prices)
	png(filename = "Figure3.png",
    		width = 3200, height = 2400, units = "px", pointsize = 48,
    		bg = "white")
	
	lwd = 4
	z=plot_fanchart( dates, quantiles, cols=c('white',col1), ylab='number of storms', ylim=c(0,15), cex.lab=1.4, main="Prices in the CRUCIAL Atlantic Hurricane Market 2024",cex.main=1.5,
		xlim=c(min(dates),max(dates)+14), medianlwd=lwd )
	axis.Date(1,Day,at=seq(min(dates),max(dates)+1,by="weeks"),format="%d-%b-%y",las=2,cex.axis=0.7)
	axis(2,at=seq(0,20),labels=c(seq(0,19),"20+"),cex.axis=1.1,las=2)
	lines(dates,HURR_count,lwd=lwd,col=col2)
	lines(dates,PROJ_count,lwd=lwd,col=col3,lty=1)
	lines(dates,PROJ_count+PROJ_count_sd,lwd=lwd,col=col3,lty=3)
	lines(dates,PROJ_count-PROJ_count_sd,lwd=lwd,col=col3,lty=3)
	k = 1
	for (iHurricane in hurricanes) {
		text(obsdata$HURRICANE.STATUS[iHurricane],k-0.5,sprintf("%s (C%1.0f)",obsdata$STORM[iHurricane],obsdata$CATEGORY[iHurricane]),pos=4,cex=1.2)
		k = k + 1
	}
	points(fcstdata$DATE,fcstdata$HURRICANES,pch=16,col=col4,cex=1.5)
	legend('bottomleft',
		legend=c('median','60% interval','observed hurricanes','simple expectation model','simple model std. dev.','forecasts'),
		lwd=c(lwd,NA,lwd,lwd,lwd,NA),pch=c(NA,15,NA,NA,NA,16),lty=c(1,NA,1,1,3,NA),col=c('black',z$colours[2],col2,col3,col3,col4)
	)
	dev.off()


